# Import libraries.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.io as pio
import networkx as nx
import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)
import warnings
warnings.filterwarnings('ignore')
# Read data in and make a copy.
data_in1 = pd.read_csv ('Highest GDP Countries and Wages.csv', sep = ',')
data_in = data_in1.copy()
data_in
| Country | Top Export | Exports | Imports | Import / Export Ratio | 2022 Population | 2022 GDP | 2022 GDP per capita | Est. 2023 Population | Est. 2023 GDP | ... | Est. 2024 GDP | Est. 2024 GDP per capita | Pop. Growth 2013-2022 | Pop. Growth / Year | Urban Pop | Fert. Rate | Med. Age | Income | Unemployment | Area | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Switzerland | Gold | 620424 | 356763 | 0.58 | 8775745 | 818426 | 93260 | 8857359 | 878442 | ... | 938458 | 105669 | 8.49 | 0.93 | 75 | 1.5 | 42 | 95490 | 4.30 | 15942 |
| 1 | Norway | Petroleum | 321076 | 107268 | 0.33 | 5329047 | 579422 | 108729 | 5373810 | 553186 | ... | 526951 | 94660 | 7.43 | 0.84 | 86 | 1.5 | 40 | 94540 | 3.23 | 148449 |
| 2 | Luxembourg | Iron | 163585 | 26068 | 0.16 | 652208 | 81530 | 125006 | 665839 | 85043 | ... | 88556 | 131384 | 20.20 | 2.09 | 88 | 1.4 | 39 | 89200 | 4.58 | 998 |
| 3 | Ireland | Blood | 723121 | 147913 | 0.20 | 5120211 | 532415 | 103983 | 5176021 | 548217 | ... | 564020 | 106059 | 10.89 | 1.09 | 64 | 1.8 | 38 | 79730 | 4.48 | 27458 |
| 4 | United States | Petroleum | 3011859 | 3375948 | 1.12 | 337273680 | 25744100 | 76330 | 339331049 | 27262591 | ... | 28781083 | 85373 | 4.41 | 0.61 | 83 | 1.7 | 38 | 76770 | 3.65 | 3809525 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58 | Pakistan | Clothing | 38700 | 71105 | 1.84 | 205661422 | 326796 | 1589 | 210494465 | 332516 | ... | 338237 | 1461 | 23.30 | 2.35 | 35 | 3.3 | 21 | 1560 | 5.60 | 307373 |
| 59 | Nepal | Soybean oil | 2722 | 13716 | 5.04 | 29473448 | 39406 | 1337 | 29841866 | 41792 | ... | 44179 | 1397 | 13.49 | 1.25 | 22 | 2.0 | 24 | 1340 | 10.92 | 56827 |
| 60 | Sudan | Gold | 5908 | 7448 | 1.26 | 33329401 | 36729 | 1102 | 34202631 | 31797 | ... | 26865 | 547 | 26.44 | 2.62 | 35 | 4.3 | 19 | 760 | 17.59 | 710689 |
| 61 | Somalia | Goats | 1363 | 3519 | 2.58 | 17601351 | 10420 | 592 | 18182195 | 11612 | ... | 12804 | 776 | 34.71 | 3.30 | 46 | 6.1 | 15 | 600 | 19.29 | 246199 |
| 62 | Afghanistan | Gold | 1476 | 4689 | 3.18 | 39814606 | 14174 | 356 | 40937377 | 14320 | ... | 14467 | 422 | 27.45 | 2.82 | 26 | 4.4 | 17 | 380 | 14.10 | 252072 |
63 rows × 22 columns
# Display general information of the dataset.
data_in.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 63 entries, 0 to 62 Data columns (total 22 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Country 63 non-null object 1 Top Export 63 non-null object 2 Exports 63 non-null int64 3 Imports 63 non-null int64 4 Import / Export Ratio 63 non-null float64 5 2022 Population 63 non-null int64 6 2022 GDP 63 non-null int64 7 2022 GDP per capita 63 non-null int64 8 Est. 2023 Population 63 non-null int64 9 Est. 2023 GDP 63 non-null int64 10 Est. 2023 GDP per capita 63 non-null int64 11 Est. 2024 Population 63 non-null int64 12 Est. 2024 GDP 63 non-null int64 13 Est. 2024 GDP per capita 63 non-null int64 14 Pop. Growth 2013-2022 63 non-null float64 15 Pop. Growth / Year 63 non-null float64 16 Urban Pop 63 non-null int64 17 Fert. Rate 63 non-null float64 18 Med. Age 63 non-null int64 19 Income 63 non-null int64 20 Unemployment 63 non-null float64 21 Area 63 non-null int64 dtypes: float64(5), int64(15), object(2) memory usage: 11.0+ KB
# Reduce dataset and reposition columns..
df1 = data_in.iloc[:,[0, 6, 5, 15, 16, 17, 18, 19, 20, 21]].copy()
df1
| Country | 2022 GDP | 2022 Population | Pop. Growth / Year | Urban Pop | Fert. Rate | Med. Age | Income | Unemployment | Area | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Switzerland | 818426 | 8775745 | 0.93 | 75 | 1.5 | 42 | 95490 | 4.30 | 15942 |
| 1 | Norway | 579422 | 5329047 | 0.84 | 86 | 1.5 | 40 | 94540 | 3.23 | 148449 |
| 2 | Luxembourg | 81530 | 652208 | 2.09 | 88 | 1.4 | 39 | 89200 | 4.58 | 998 |
| 3 | Ireland | 532415 | 5120211 | 1.09 | 64 | 1.8 | 38 | 79730 | 4.48 | 27458 |
| 4 | United States | 25744100 | 337273680 | 0.61 | 83 | 1.7 | 38 | 76770 | 3.65 | 3809525 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 58 | Pakistan | 326796 | 205661422 | 2.35 | 35 | 3.3 | 21 | 1560 | 5.60 | 307373 |
| 59 | Nepal | 39406 | 29473448 | 1.25 | 22 | 2.0 | 24 | 1340 | 10.92 | 56827 |
| 60 | Sudan | 36729 | 33329401 | 2.62 | 35 | 4.3 | 19 | 760 | 17.59 | 710689 |
| 61 | Somalia | 10420 | 17601351 | 3.30 | 46 | 6.1 | 15 | 600 | 19.29 | 246199 |
| 62 | Afghanistan | 14174 | 39814606 | 2.82 | 26 | 4.4 | 17 | 380 | 14.10 | 252072 |
63 rows × 10 columns
# Display general mathematical information.
df1.describe()
| 2022 GDP | 2022 Population | Pop. Growth / Year | Urban Pop | Fert. Rate | Med. Age | Income | Unemployment | Area | |
|---|---|---|---|---|---|---|---|---|---|
| count | 6.300000e+01 | 6.300000e+01 | 63.000000 | 63.000000 | 63.000000 | 63.000000 | 63.000000 | 63.000000 | 6.300000e+01 |
| mean | 1.446446e+06 | 9.929541e+07 | 0.902857 | 67.539683 | 2.047619 | 34.650794 | 26517.460317 | 6.474921 | 5.797850e+05 |
| std | 3.919106e+06 | 2.519857e+08 | 1.081826 | 20.319882 | 0.967514 | 8.719941 | 27606.832719 | 4.871535 | 1.217321e+06 |
| min | 1.042000e+04 | 6.522080e+05 | -1.200000 | 19.000000 | 1.200000 | 15.000000 | 380.000000 | 0.230000 | 9.980000e+02 |
| 25% | 1.026310e+05 | 9.499294e+06 | 0.295000 | 54.000000 | 1.500000 | 28.500000 | 4055.000000 | 3.640000 | 3.399800e+04 |
| 50% | 4.088020e+05 | 3.332940e+07 | 0.900000 | 71.000000 | 1.700000 | 37.000000 | 12750.000000 | 4.980000 | 1.306890e+05 |
| 75% | 1.058088e+06 | 7.768432e+07 | 1.520000 | 84.500000 | 2.050000 | 41.500000 | 49200.000000 | 7.730000 | 4.044750e+05 |
| max | 2.574410e+07 | 1.437387e+09 | 3.410000 | 99.000000 | 6.100000 | 49.000000 | 95490.000000 | 28.840000 | 6.592812e+06 |
# Display the scatter plot / histogram matrix.
sns.pairplot(df1)
<seaborn.axisgrid.PairGrid at 0x24971b34700>
# Create a dataframe by condition of median age greater than or equal to 43.
age_high = df1.loc[(df1["Med. Age"] >= 43)].copy()
age_high
| Country | 2022 GDP | 2022 Population | Pop. Growth / Year | Urban Pop | Fert. Rate | Med. Age | Income | Unemployment | Area | |
|---|---|---|---|---|---|---|---|---|---|---|
| 9 | Austria | 470302 | 9029509 | 0.70 | 59 | 1.5 | 43 | 55720 | 4.99 | 32388 |
| 11 | Finland | 282511 | 5553369 | 0.26 | 87 | 1.4 | 43 | 54890 | 6.72 | 130689 |
| 12 | Germany | 4076923 | 83684120 | 0.41 | 77 | 1.5 | 45 | 54030 | 3.14 | 138068 |
| 19 | Japan | 4232173 | 124413469 | -0.20 | 94 | 1.3 | 49 | 42550 | 2.60 | 145934 |
| 20 | Italy | 2046952 | 58861053 | -0.10 | 72 | 1.3 | 48 | 38200 | 8.07 | 116629 |
| 21 | Spain | 1415874 | 47712687 | 0.21 | 80 | 1.3 | 45 | 32090 | 12.92 | 195360 |
| 23 | Portugal | 254849 | 10395635 | -0.10 | 67 | 1.4 | 46 | 25950 | 6.01 | 35608 |
| 24 | Lithuania | 70878 | 2827767 | -0.53 | 71 | 1.6 | 44 | 23870 | 5.96 | 25207 |
| 25 | Latvia | 40876 | 1876767 | -0.79 | 69 | 1.6 | 44 | 21850 | 6.81 | 24940 |
| 26 | Greece | 217285 | 10412852 | -0.57 | 86 | 1.4 | 45 | 21810 | 12.43 | 50984 |
| 29 | Bulgaria | 90213 | 6455775 | -1.20 | 78 | 1.6 | 45 | 13350 | 4.27 | 42614 |
| 35 | Serbia | 63563 | 6664185 | -0.77 | 69 | 1.5 | 43 | 9290 | 8.68 | 29957 |
# Create a dataframe by condition of median age less than or equal to 24.
age_low = df1.loc[(df1["Med. Age"] <= 24)].copy()
age_low
| Country | 2022 GDP | 2022 Population | Pop. Growth / Year | Urban Pop | Fert. Rate | Med. Age | Income | Unemployment | Area | |
|---|---|---|---|---|---|---|---|---|---|---|
| 44 | Iraq | 264182 | 44497557 | 3.41 | 71 | 3.4 | 20 | 5270 | 15.32 | 167974 |
| 46 | Egypt | 409306 | 95298253 | 2.55 | 41 | 2.8 | 24 | 4100 | 6.40 | 384788 |
| 52 | Bolivia | 44008 | 12224444 | 1.65 | 69 | 2.5 | 24 | 3490 | 3.55 | 424162 |
| 55 | Nigeria | 475058 | 219629218 | 2.71 | 54 | 5.1 | 17 | 2160 | 3.83 | 356667 |
| 57 | Cameroon | 44341 | 28351023 | 2.65 | 58 | 4.3 | 18 | 1640 | 3.78 | 179942 |
| 58 | Pakistan | 326796 | 205661422 | 2.35 | 35 | 3.3 | 21 | 1560 | 5.60 | 307373 |
| 59 | Nepal | 39406 | 29473448 | 1.25 | 22 | 2.0 | 24 | 1340 | 10.92 | 56827 |
| 60 | Sudan | 36729 | 33329401 | 2.62 | 35 | 4.3 | 19 | 760 | 17.59 | 710689 |
| 61 | Somalia | 10420 | 17601351 | 3.30 | 46 | 6.1 | 15 | 600 | 19.29 | 246199 |
| 62 | Afghanistan | 14174 | 39814606 | 2.82 | 26 | 4.4 | 17 | 380 | 14.10 | 252072 |
# Create Series for chart data and axes values.
y_fert = age_low['Fert. Rate'].sort_values().copy()
y_age = age_low['Med. Age'].sort_values().copy()
x_fert = age_high['Fert. Rate'].sort_values().copy()
x_age = age_high['Med. Age'].sort_values().copy()
y_income = age_low['Income'].sort_values().copy()
x_income = age_high['Income'].sort_values().copy()
x_country = age_high['Country'].sort_values().copy()
y_country = age_low['Country'].sort_values().copy()
# Create the figure area.
fig1 = make_subplots(rows = 2, cols = 2)
# Add the subplots.
fig1 = fig1.add_trace(
go.Scatter(x = x_fert, y = x_age, text = x_age,
mode = 'lines+markers+text', textfont = dict(color = 'black', weight = 'bold', size = 11), textposition = 'bottom right'),
row = 1, col = 1
)
fig1 = fig1.add_trace(
go.Scatter(x = x_income, y = x_age, text = x_age,
mode = 'lines+markers+text', textfont = dict(color = 'black', weight = 'bold', size = 11), textposition = 'top left'),
row = 1, col = 2
)
fig1 = fig1.add_trace(
go.Scatter(x = y_fert, y = y_age, text = y_age,
mode='lines+markers+text', textfont = dict(color ='black', weight = 'bold', size = 11), textposition = 'bottom right'),
row=2, col=1
)
fig1 = fig1.add_trace(
go.Scatter(x = y_income, y = y_age, text = y_age,
mode='lines+markers+text', textfont=dict(color = 'black', weight = 'bold', size = 11), textposition = 'top left'),
row = 2, col = 2
)
# Change size of figure and add title.
fig1 = fig1.update_layout(height = 800, width = 1200, title_text = " <b> Minimum / Maximum Median Age by Income and Fertility Rate<b>", font_family = "Courier New",
font_size = 22, title_font_family = "Cursive", title_font_color = "crimson", showlegend = False)
# Label the axes on the subplots.
fig1 = fig1.update_xaxes(tickcolor = 'red' , title_text = "<b>Fertility Rate<b>", title_font=dict(size=30, family = 'tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 1, col = 1)
fig1 = fig1.update_yaxes(title_text = "<b>Median Age<b>", title_font = dict(size=30, family='tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 1, col = 1)
fig1 = fig1.update_xaxes(title_text = "<b>Income<b>", title_font = dict(size=30, family='tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 1, col = 2)
fig1 = fig1.update_yaxes(row = 1, col = 2, tickfont = dict(color = "darkorange"), tickprefix = "<b>", )
fig1 = fig1.update_yaxes(title_text = "<b>Median Age<b>", title_font = dict(size = 30, family = 'tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 2, col = 1)
fig1 = fig1.update_xaxes(title_text = "<b>Fertility Rate<b>", title_font = dict(size = 30, family = 'tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 2, col = 1)
fig1 = fig1.update_xaxes(title_text = "<b>Income<b>", title_font = dict(size = 30, family = 'tekton pro', color = 'darkslateblue'), tickfont = dict(color = "darkorange"), tickprefix = "<b>", row = 2, col = 2)
fig1 = fig1.update_yaxes(row = 2, col = 2, title_font_color = "red", tickfont=dict(color = "darkorange"),tickprefix="<b>", )
fig1.show()
plt.style.use('fivethirtyeight')
# Function to annotate with value labels and arced arrows.
def arrow_labels(ax, x, y, label, k):
G = nx.DiGraph()
data_nodes = []
init_pos = {}
for xi, yi, label in zip(x, y, label):
data_str = 'data_{0}'.format(label)
G.add_node(data_str)
G.add_node(label)
G.add_edge(label, data_str)
data_nodes.append(data_str)
init_pos[data_str] = (xi, yi)
init_pos[label] = (xi, yi)
pos = nx.spring_layout(G, pos = init_pos, fixed = data_nodes, k = k)
for label, data_str in G.edges():
ax.annotate(label,
xy = pos[data_str], xycoords = 'data',
xytext = pos[label], textcoords = 'data',
arrowprops = dict(arrowstyle = "->",
shrinkA = -5, shrinkB = -17,
connectionstyle = "arc3, rad = 0.5",
color = 'red'), fontsize = 20, color = 'blue')
# Get lists of the top 10 countries sorted by unemployment.
x_vals0 = df1[['Country', 'Unemployment', 'Med. Age']].sort_values(by = 'Unemployment', ascending = False).copy()
x_vals1 = x_vals0['Unemployment'][:10].copy()
y_vals1 = x_vals0['Med. Age'][:10].copy()
vals1 = x_vals0['Country'][:10].copy()
lst1 = vals1.values.tolist().copy()
# Create subplots.
fig2, ax = plt.subplots()
# Specify plot parameters.
ax.plot(x_vals1, y_vals1, color = 'green', marker = '*', linestyle = 'dashed',
linewidth = 2, markersize = 30, mfc = 'orange', mec = 'blue', label = lst1)
# Set the size of the figure.
fig2.set_figwidth(25)
fig2.set_figheight(18)
# Set axis limits
ax.set_xlim(9, 31)
ax.set_ylim(13, 45.5)
# Add x and y axis labels.
ax.set_xlabel('Unemployment (%)', labelpad = 25, fontsize = 28, color = 'red' )
ax.set_ylabel('Median Age', labelpad = 25, fontsize = 28, color = 'red')
# Change the font size of xticks & yticks
plt.xticks(fontsize = 20)
plt.yticks(fontsize = 20)
fig2 = fig2.suptitle("Top 10 Countries by Unemployment Rate and Median Age", fontsize = 40, color = 'darkolivegreen')
x_lst1 = x_vals1.values.tolist().copy()
y_lst1 = y_vals1.values.tolist().copy()
arrow_labels(ax, x_lst1, y_lst1, lst1, k = .835)
plt.savefig('Top 10 Countries by Unemployment & Median Age.png')
plt.show()